In [ ]:
from __future__ import print_function
import os
import numpy as np
import tensorflow as tf
print(tf.__version__)
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="0"
#Show images
import matplotlib.pyplot as plt
%matplotlib inline
# plt configuration
plt.rcParams['figure.figsize'] = (10, 10) # size of images
plt.rcParams['image.interpolation'] = 'nearest' # show exact image
plt.rcParams['image.cmap'] = 'gray' # use grayscale
ATIS (Airline Travel Information System) dataset. Available in: https://github.com/mesnilgr/is13/blob/master/data/load.py
Input (words) show flights from Boston to New York today
Output (labels) O O O B-dept O B-arr I-arr B-date
In [ ]:
# Read data
import pickle
import sys
atis_file = '/home/ubuntu/data/training/text/atis/atis.pkl'
with open(atis_file,'rb') as f:
if sys.version_info.major==2:
train, test, dicts = pickle.load(f) #python2.7
else:
train, test, dicts = pickle.load(f, encoding='bytes') #python3
In [ ]:
#Dictionaries and train test partition
w2idx = dict()
for i in dicts[b'words2idx']:
w2idx[i.decode("utf-8")] = dicts[b'words2idx'][i]
ne2idx = dict()
for i in dicts[b'tables2idx']:
ne2idx[i.decode("utf-8")] = dicts[b'tables2idx'][i]
labels2idx = dict()
for i in dicts[b'labels2idx']:
labels2idx[i.decode("utf-8")] = dicts[b'labels2idx'][i]
idx2w = dict((v,k) for k,v in w2idx.items())
idx2la = dict((v,k) for k,v in labels2idx.items())
train_x, _, train_label = train
test_x, _, test_label = test
# Visualize data
wlength = 35
for e in ['train','test']:
print(e)
for sw, sl in zip(eval(e+'_x')[:2], eval(e+'_label')[:2]):
print( 'WORD'.rjust(wlength), 'LABEL'.rjust(wlength))
for wx, la in zip(sw, sl): print( idx2w[wx].rjust(wlength), idx2la[la].rjust(wlength))
print( '\n'+'**'*30+'\n')
In [ ]:
#Select words for the label 48: b'B-fromloc.city_name' in train and test to check that are different:
for e in ['train','test']:
print(e)
print('---------')
for sw, sl in zip(eval(e+'_x')[:5], eval(e+'_label')[:5]):
for wx, la in zip(sw, sl):
if la==48:
print( idx2w[wx])
print('\n')
- Convert the list of sequences of words into an array of words x characteristics.
- The characteristics are the context of the word in the sentence.
- For each word in the sentence, generate the context with the previous and the next words in the sentence.
- For words at the beggining and the end, use padding to complete the context.
In [ ]:
# Max value of word coding to assign the ID_PAD
ID_PAD = np.max([np.max(tx) for tx in train_x]) + 1
print('ID_PAD: ', ID_PAD)
def context(l, size=3):
l = list(l)
lpadded = size // 2 * [ID_PAD] + l + size // 2 * [ID_PAD]
out = [lpadded[i:(i + size)] for i in range(len(l))]
return out
#Example
x = np.array([0, 1, 2, 3, 4], dtype=np.int32)
print('Context vectors: ', context(x))
In [ ]:
# Create train and test X y.
X_trn=[]
for s in train_x:
X_trn += context(s,size=10)
X_trn = np.array(X_trn)
X_tst=[]
for s in test_x:
X_tst += context(s,size=10)
X_tst = np.array(X_tst)
print('X trn shape: ', X_trn.shape)
print('X_tst shape: ',X_tst.shape)
y_trn=[]
for s in train_label:
y_trn += list(s)
y_trn = np.array(y_trn)
print('y_trn shape: ',y_trn.shape)
y_tst=[]
for s in test_label:
y_tst += list(s)
y_tst = np.array(y_tst)
print('y_tst shape: ',y_tst.shape)
In [ ]:
print('Num labels: ',len(set(y_trn)))
print('Num words: ',len(set(idx2w)))
In [ ]:
#General parameters
LOG_DIR = '/tmp/tensorboard/airline/embeddings/'
# data attributes
input_seq_length = X_trn.shape[1]
input_vocabulary_size = len(set(idx2w)) + 1
output_length = 127
#Model parameters
embedding_size=64
In [ ]:
# build the model: Simple LSTM with embedings
from tensorflow.contrib.keras import layers, models, optimizers
print('Build model 1')
seq_input = layers.Input(shape=([input_seq_length]), name='prev')
#----------------------------------------
# Put your embedding layer here
#----------------------------------------
#----------------------------------------
# You need to do some transformation to connect the embedding out to the dense layer
#----------------------------------------
#----------------------------------------
# Put your final dense layer layer here
#----------------------------------------
output =
model1 = models.Model(inputs=seq_input, outputs=output)
model1.summary()
# Optimizer
adam_optimizer = optimizers.Adam()
model1.compile(loss='sparse_categorical_crossentropy', optimizer=adam_optimizer, metrics=['accuracy'])
In [ ]:
#Plot the model graph
from tensorflow.contrib.keras import utils
# Create model image
utils.plot_model(model1, '/tmp/model1.png')
# Show image
plt.imshow(plt.imread('/tmp/model1.png'))
In [ ]:
#Fit model
history = model1.fit(X_trn, y_trn, batch_size=128, epochs=10,
validation_data=(X_tst, y_tst))
In [ ]:
#Plot graphs in the notebook output
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.show()
In [ ]:
# Predict. Score new paragraph
def score_paragraph(paragraph):
#Preprocess data
p_w = paragraph.split()
p_w_c = [w2idx[w] for w in p_w]
x_score = np.array(context(p_w_c, size=10))
# Score
pred_score = model1.predict(x_score)
response = [idx2la[l] for l in np.argmax(pred_score,axis=1)]
return response
paragraph = 'i need a business ticket in any flight with departure from alaska to las vegas monday with breakfast'
response = score_paragraph(paragraph)
wlength = 35
for wx, la in zip(paragraph.split(), response): print( wx.rjust(wlength), la.rjust(wlength))